#!/usr/bin/perl

=pod

=head1 NAME

apt-mirror - apt sources mirroring tool

=head1 SYNOPSIS

apt-mirror [--[no-]progress] [--verbose] [configfile]

=head1 DESCRIPTION

A small and efficient tool that lets you mirror a part of or
the whole Debian GNU/Linux distribution or any other apt sources.

Main features:

=over

=item *
It uses a config similar to APT's F<sources.list>

=item *
It's fully pool compliant

=item *
It supports multithreaded downloading

=item *
It supports multiple architectures at the same time

=item *
It can automatically remove unneeded files

=item *
It works well on an overloaded Internet connection

=item *
It never produces an inconsistent mirror including while mirroring

=item *
It works on all POSIX compliant systems with Perl and wget

=back

=head1 COMMENTS

apt-mirror uses F</etc/apt/mirror.list> as a configuration file.
By default it is tuned to official Debian or Ubuntu mirrors. Change
it for your needs.

After you setup the configuration file you may run as root:

    # su - apt-mirror -c apt-mirror

Or uncomment the line in F</etc/cron.d/apt-mirror> to enable daily mirror updates.

=head1 FILES

F</etc/apt/mirror.list>
        Main configuration file

F</etc/cron.d/apt-mirror>
        Cron configuration template

F</var/spool/apt-mirror/mirror>
        Mirror places here

F</var/spool/apt-mirror/skel>
        Place for temporarily downloaded indexes

F</var/spool/apt-mirror/var>
        Log files placed here. URLs and MD5 checksums also here.

=head1 CONFIGURATION EXAMPLES

The mirror.list configuration supports many options, the file is well commented explaining each option.
Here are some sample mirror configuration lines showing the various supported ways:

Normal:
deb http://example.com/debian stable main contrib non-free

Arch Specific: (many other architectures are supported)
deb-powerpc http://example.com/debian stable main contrib non-free

HTTP and FTP Auth or non-standard port:
deb http://user:pass@example.com:8080/debian stable main contrib non-free

HTTPS with sending Basic HTTP authentication information (plaintext username and password) for all requests:
(this was default behaviour of Wget 1.10.2 and prior and is needed for some servers with new version of Wget)
set auth_no_challenge 1
deb https://user:pass@example.com:443/debian stable main contrib non-free

HTTPS without checking certificate:
set no_check_certificate 1
deb https://example.com:443/debian stable main contrib non-free

Source Mirroring:
deb-src http://example.com/debian stable main contrib non-free

=head1 AUTHORS

Dmitry N. Hramtsov E<lt>hdn@nsu.ruE<gt>
Brandon Holtsclaw E<lt>me@brandonholtsclaw.comE<gt>

=cut

use warnings;
use strict;
use File::Copy;
use File::Compare;
use File::Path qw(make_path);
use File::Basename;
use Fcntl qw(:flock);
use Getopt::Long;

my $config_file;

my %config_variables = (
    "defaultarch" => `dpkg --print-architecture 2>/dev/null` || 'i386',
    "nthreads"    => 20,
    "base_path"   => '/var/spool/apt-mirror',
    "mirror_path" => '$base_path/mirror',
    "skel_path"   => '$base_path/skel',
    "var_path"    => '$base_path/var',
    "cleanscript" => '$var_path/clean.sh',
    "_contents"   => 1,
    "_autoclean"  => 0,
    "_tilde"      => 0,
    "limit_rate"  => '100m',
    "run_postmirror"       => 1,
    "auth_no_challenge"    => 0,
    "no_check_certificate" => 0,
    "no_verbose"           => 0,
    "unlink"               => 0,
    "postmirror_script"    => '$var_path/postmirror.sh',
    "use_proxy"            => 'off',
    "http_proxy"           => '',
    "https_proxy"          => '',
    "proxy_user"           => '',
    "proxy_password"       => ''
);

my @config_binaries = ();
my @config_sources  = ();

my @index_urls;
my @childrens       = ();
my %skipclean       = ();
my %clean_directory = ();
my $verbose         = 0;
my $progress        = 1;

######################################################################################
## Setting up $config_file variable

$config_file = "/etc/apt/mirror.list";    # Default value
GetOptions('verbose|v+', \$verbose,
           'progress|p!', \$progress,
    ) or die "Usage: apt-mirror [--verbose] [--[no-]progress] [configfile]\n";

if ( $_ = shift )
{
    die("apt-mirror: invalid config file specified") unless -e $_;
    $config_file = $_;
}

chomp $config_variables{"defaultarch"};

######################################################################################
## Common subroutines

sub round_number
{
    my $n = shift;
    my $minus = $n < 0 ? '-' : '';
    $n = abs($n);
    $n = int( ( $n + .05 ) * 10 ) / 10;
    $n .= '.0' unless $n =~ /\./;
    $n .= '0' if substr( $n, ( length($n) - 1 ), 1 ) eq '.';
    chop $n if $n =~ /\.\d\d0$/;
    return "$minus$n";
}

sub format_bytes
{
    my $bytes     = shift;
    my $bytes_out = '0';
    my $size_name = 'bytes';
    my $KiB       = 1024;
    my $MiB       = 1024 * 1024;
    my $GiB       = 1024 * 1024 * 1024;

    if ( $bytes >= $KiB )
    {
        $bytes_out = $bytes / $KiB;
        $size_name = 'KiB';
        if ( $bytes >= $MiB )
        {
            $bytes_out = $bytes / $MiB;
            $size_name = 'MiB';
            if ( $bytes >= $GiB )
            {
                $bytes_out = $bytes / $GiB;
                $size_name = 'GiB';
            }
        }
        $bytes_out = round_number($bytes_out);
    }
    else
    {
        $bytes_out = $bytes;
        $size_name = 'bytes';
    }

    return "$bytes_out $size_name";
}

sub get_variable
{
    my $value = $config_variables{ shift @_ };
    my $count = 16;
    while ( $value =~ s/\$(\w+)/$config_variables{$1}/xg )
    {
        die("apt-mirror: too many substitution while evaluating variable") if ( $count-- ) < 0;
    }
    return $value;
}

sub quoted_path
{
    my $path = shift;
    $path =~ s/'/'\\''/g;
    return "'" . $path . "'";
}

sub lock_aptmirror
{
    open( LOCK_FILE, '>', get_variable("var_path") . "/apt-mirror.lock" );
    my $lock = flock( LOCK_FILE, LOCK_EX | LOCK_NB );
    if ( !$lock )
    {
        die("apt-mirror is already running, exiting");
    }
}

sub unlock_aptmirror
{
    close(LOCK_FILE);
    unlink( get_variable("var_path") . "/apt-mirror.lock" );
}

sub download_urls
{
    my $stage = shift;
    my @urls;
    my $i = 0;
    my $pid;
    my $nthreads = get_variable("nthreads");
    my @args     = ();
    local $| = 1;

    @urls = @_;
    $nthreads = @urls if @urls < $nthreads;

    if ( get_variable("auth_no_challenge") == 1 )    { push( @args, "--auth-no-challenge" ); }
    if ( get_variable("no_check_certificate") == 1 ) { push( @args, "--no-check-certificate" ); }
    if ( get_variable("no_verbose") == 1 )           { push( @args, "--no-verbose" ); }
    if ( get_variable("unlink") == 1 )               { push( @args, "--unlink" ); }
    if ( length( get_variable("use_proxy") ) && ( get_variable("use_proxy") eq 'yes' || get_variable("use_proxy") eq 'on' ) )
    {
        if ( length( get_variable("http_proxy") ) || length( get_variable("https_proxy") ) ) { push( @args, "-e use_proxy=yes" ); }
        if ( length( get_variable("http_proxy") ) ) { push( @args, "-e http_proxy=" . get_variable("http_proxy") ); }
        if ( length( get_variable("https_proxy") ) ) { push( @args, "-e https_proxy=" . get_variable("https_proxy") ); }
        if ( length( get_variable("proxy_user") ) ) { push( @args, "-e proxy_user=" . get_variable("proxy_user") ); }
        if ( length( get_variable("proxy_password") ) ) { push( @args, "-e proxy_password=" . get_variable("proxy_password") ); }
    }
    print "Downloading " . scalar(@urls) . " $stage files using $nthreads threads...\n";

    while ( scalar @urls )
    {
        my @part = splice( @urls, 0, int( @urls / $nthreads ) );
        open URLS, ">" . get_variable("var_path") . "/$stage-urls.$i" or die("apt-mirror: can't write to intermediate file ($stage-urls.$i)");
        foreach (@part) { print URLS "$_\n"; }
        close URLS or die("apt-mirror: can't close intermediate file ($stage-urls.$i)");
        if ($verbose >= 2) {
            print join("\n  ", "Downloading batch $i:", @part), "\n";
        }
        $pid = fork();

        die("apt-mirror: can't do fork in download_urls") if !defined($pid);

        if ( $pid == 0 )
        {
            system('wget', '--no-cache',
                   '--limit-rate=' . get_variable("limit_rate"),
                   '-t', '5', '-r', '-N', '-l', 'inf',
                   '-o', get_variable("var_path") . "/$stage-log.$i",
                   '-i', get_variable("var_path") . "/$stage-urls.$i", @args);
            die("Could not run wget, please make sure it is installed and in your path\n")
                if $? == -1;
            my $exit_status = $? >> 8;
            if ( $exit_status ) {
                if ( $stage eq 'archive' || $exit_status < 8 ) {
                    # wget exit code 8 is "server error response".  This is normal for downloading indices where we try
                    # to download .gz, .xz and .bz2 and don't care about some of them returning 404.  For package
                    # download, this is probably a real problem which should be investigated.
                    print "wget failed with exit status $exit_status.  See ",
                        get_variable("var_path") . "/$stage-log.$i", " for details.\n";
                    exit ($exit_status);
                }
            }
            # child is done
            exit (0);
        }

        push @childrens, $pid;
        $i++;
        $nthreads--;
    }

    print "Begin time: " . localtime() . "\n[" . scalar(@childrens) . "]... "
        if $progress;
    while ( scalar @childrens )
    {
        my $dead = wait();
        @childrens = grep { $_ != $dead } @childrens;
        print "[" . scalar(@childrens) . "]... "
            if $progress;
    }
    print "\nEnd time: " . localtime() . "\n\n"
        if $progress;
}

## Parse config

sub parse_config_line
{
    my $pattern_deb_line = qr/^[\t ]*(?<type>deb-src|deb)(?:-(?<arch>[\w\-]+))?[\t ]+(?:\[(?<options>[^\]]+)\][\t ]+)?(?<uri>[^\s]+)[\t ]+(?<components>.+)$/;
    my $line = $_;
    my %config;
    if ( $line =~ $pattern_deb_line ) {
        $config{'type'} = $+{type};
        $config{'arch'} = $+{arch};
        $config{'options'} = $+{options} ? $+{options} : "";
        $config{'uri'} = $+{uri};
        $config{'components'} = $+{components};
        if ( $config{'options'} =~ /arch=((?<arch>[\w\-]+)[,]*)/g ) {
            $config{'arch'} = $+{arch};
        }
        $config{'components'} = [ split /\s+/, $config{'components'} ];
    } elsif ( $line =~ /set[\t ]+(?<key>[^\s]+)[\t ]+(?<value>"[^"]+"|'[^']+'|[^\s]+)/ ) {
        $config{'type'} = 'set';
        $config{'key'} = $+{key};
        $config{'value'} = $+{value};
        $config{'value'} =~ s/^'(.*)'$/$1/;
        $config{'value'} =~ s/^"(.*)"$/$1/;
    } elsif ( $line =~ /(?<type>clean|skip-clean)[\t ]+(?<uri>[^\s]+)/ ) {
        $config{'type'} = $+{type};
        $config{'uri'} = $+{uri};
    }

    return %config;
}

open CONFIG, "<$config_file" or die("apt-mirror: can't open config file ($config_file)");
while (<CONFIG>)
{
    next if /^\s*#/;
    next unless /\S/;
    my $line = $_;
    my %config_line = parse_config_line;

    if ( $config_line{'type'} eq "set" ) {
        $config_variables{ $config_line{'key'} } = $config_line{'value'};
        next;
    } elsif ( $config_line{'type'} eq "deb" ) {
        my $arch = $config_line{'arch'};
        $arch = get_variable("defaultarch") if ! defined $config_line{'arch'};
        push @config_binaries, [ $arch, $config_line{'uri'}, @{$config_line{'components'}} ];
        next;
    } elsif ( $config_line{'type'} eq "deb-src" ) {
        push @config_sources, [ $config_line{'uri'}, @{$config_line{'components'}} ];
        next;
    } elsif ( $config_line{'type'} =~ /(skip-clean|clean)/ ) {
        my $link = $config_line{'uri'};
        $link =~ s[^(\w+)://][];
        $link =~ s[/$][];
        $link =~ s[~][%7E]g if get_variable("_tilde");
        if ( $config_line{'type'} eq "skip-clean" ) {
            $skipclean{ $link } = 1;
        } elsif ( $config_line{'type'} eq "clean" ) {
            $clean_directory{ $link } = 1;
        }
        next;
    }

    die("apt-mirror: invalid line in config file ($.: $line ...)");
}
close CONFIG;

die("Please explicitly specify 'defaultarch' in mirror.list") unless get_variable("defaultarch");

######################################################################################
## Create the 3 needed directories if they don't exist yet
my @needed_directories = ( get_variable("mirror_path"), get_variable("skel_path"), get_variable("var_path") );
foreach my $needed_directory (@needed_directories)
{
    unless ( -d $needed_directory )
    {
        make_path($needed_directory) or die("apt-mirror: can't create $needed_directory directory");
    }
}
#
#######################################################################################

lock_aptmirror();

######################################################################################
## Skel download

my %urls_to_download = ();
my ( $url, $arch );

sub remove_double_slashes
{
    local $_ = shift;
    while (s[/\./][/]g)                { }
    while (s[(?<!:)//][/]g)            { }
    while (s[(?<!:/)/[^/]+/\.\./][/]g) { }
    s/~/\%7E/g if get_variable("_tilde");
    return $_;
}

sub add_url_to_download
{
    my $url = remove_double_slashes(shift);
    $urls_to_download{$url} = shift;
}

foreach (@config_sources)
{
    my ( $uri, $distribution, @components ) = @{$_};

    if (@components)
    {
        $url = $uri . "/dists/" . $distribution . "/";

        add_url_to_download( $url . "InRelease" );
        add_url_to_download( $url . "Release" );
        add_url_to_download( $url . "Release.gpg" );
        foreach (@components)
        {
            add_url_to_download( $url . $_ . "/source/Release" );
            add_url_to_download( $url . $_ . "/source/Sources.gz" );
            add_url_to_download( $url . $_ . "/source/Sources.bz2" );
            add_url_to_download( $url . $_ . "/source/Sources.xz" );
        }
    }
    else
    {
        add_url_to_download( $uri . "/$distribution/InRelease" );
        add_url_to_download( $uri . "/$distribution/Release" );
        add_url_to_download( $uri . "/$distribution/Release.gpg" );
        add_url_to_download( $uri . "/$distribution/Sources.gz" );
        add_url_to_download( $uri . "/$distribution/Sources.bz2" );
        add_url_to_download( $uri . "/$distribution/Sources.xz" );
    }
}

foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};

    if (@components)
    {
        $url = $uri . "/dists/" . $distribution . "/";

        add_url_to_download( $url . "InRelease" );
        add_url_to_download( $url . "Release" );
        add_url_to_download( $url . "Release.gpg" );
        if ( get_variable("_contents") )
        {
            add_url_to_download( $url . "Contents-" . $arch . ".gz" );
            add_url_to_download( $url . "Contents-" . $arch . ".bz2" );
            add_url_to_download( $url . "Contents-" . $arch . ".xz" );
        }
        foreach (@components)
        {
            if ( get_variable("_contents") )
            {
                add_url_to_download( $url . $_ . "/Contents-" . $arch . ".gz" );
                add_url_to_download( $url . $_ . "/Contents-" . $arch . ".bz2" );
                add_url_to_download( $url . $_ . "/Contents-" . $arch . ".xz" );
            }
            add_url_to_download( $url . $_ . "/binary-" . $arch . "/Release" );
            add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages" );
            add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.gz" );
            add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.bz2" );
            add_url_to_download( $url . $_ . "/binary-" . $arch . "/Packages.xz" );
            add_url_to_download( $url . $_ . "/i18n/Index" );
        }
    }
    else
    {
        add_url_to_download( $uri . "/$distribution/InRelease" );
        add_url_to_download( $uri . "/$distribution/Release" );
        add_url_to_download( $uri . "/$distribution/Release.gpg" );
        add_url_to_download( $uri . "/$distribution/Packages" );
        add_url_to_download( $uri . "/$distribution/Packages.gz" );
        add_url_to_download( $uri . "/$distribution/Packages.bz2" );
        add_url_to_download( $uri . "/$distribution/Packages.xz" );
    }
}

chdir get_variable("skel_path") or die("apt-mirror: can't chdir to skel");
@index_urls = sort keys %urls_to_download;
download_urls( "index", @index_urls );

foreach ( keys %urls_to_download )
{
    s[^(\w+)://][];
    s[~][%7E]g if get_variable("_tilde");
    $skipclean{$_} = 1;
    $skipclean{$_} = 1 if s[\.gz$][];
    $skipclean{$_} = 1 if s[\.bz2$][];
    $skipclean{$_} = 1 if s[\.xz$][];
}

######################################################################################
## Translation index download

%urls_to_download = ();

sub sanitise_uri
{
    my $uri = shift;
    $uri =~ s[^(\w+)://][];
    $uri =~ s/^([^@]+)?@?// if $uri =~ /@/;
    $uri =~ s/~/\%7E/g if get_variable("_tilde");
    return $uri;
}

sub find_translation_files_in_release
{
    # Look in the dists/$DIST/Release file for the translation files that belong
    # to the given component.

    my $dist_uri  = shift;
    my $component = shift;
    my ( $release_uri, $release_path, $line ) = '';

    $release_uri  = $dist_uri . "Release";
    $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);

    unless ( open STREAM, "<$release_path" )
    {
        warn( "Failed to open Release file from " . $release_uri );
        return;
    }

    my $checksums = 0;
    while ( $line = <STREAM> )
    {
        chomp $line;
        if ($checksums)
        {
            if ( $line =~ /^ +(.*)$/ )
            {
                my @parts = split( / +/, $1 );
                if ( @parts == 3 )
                {
                    my ( $sha1, $size, $filename ) = @parts;
                    if ( $filename =~ m{^$component/i18n/Translation-[^./]*\.(bz2|xz)$} )
                    {
                        add_url_to_download( $dist_uri . $filename, $size );
                    }
                }
                else
                {
                    warn("Malformed checksum line \"$1\" in $release_uri");
                }
            }
            else
            {
                $checksums = 0;
            }
        }
        if ( not $checksums )
        {
            if ( $line eq "SHA256:" )
            {
                $checksums = 1;
            }
        }
    }
}

sub process_translation_index
{
    # Extract all translation files from the dists/$DIST/$COMPONENT/i18n/Index
    # file. Fall back to parsing dists/$DIST/Release if i18n/Index is not found.

    my $dist_uri  = remove_double_slashes(shift);
    my $component = shift;
    my ( $base_uri, $index_uri, $index_path, $line ) = '';

    $base_uri   = $dist_uri . $component . "/i18n/";
    $index_uri  = $base_uri . "Index";
    $index_path = get_variable("skel_path") . "/" . sanitise_uri($index_uri);

    unless ( open STREAM, "<$index_path" )
    {
        find_translation_files_in_release( $dist_uri, $component );
        return;
    }

    my $checksums = 0;
    while ( $line = <STREAM> )
    {
        chomp $line;
        if ($checksums)
        {
            if ( $line =~ /^ +(.*)$/ )
            {
                my @parts = split( / +/, $1 );
                if ( @parts == 3 )
                {
                    my ( $sha1, $size, $filename ) = @parts;
                    add_url_to_download( $base_uri . $filename, $size );
                }
                else
                {
                    warn("Malformed checksum line \"$1\" in $index_uri");
                }
            }
            else
            {
                $checksums = 0;
            }
        }
        if ( not $checksums )
        {
            if ( $line eq "SHA256:" or $line eq "SHA1:" or $line eq "MD5Sum:" )
            {
                $checksums = 1;
            }
        }
    }

    close STREAM;
}

print "Processing translation indexes: ["
    if $progress;

foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};
    print "T" if $progress;
    if (@components)
    {
        $url = $uri . "/dists/" . $distribution . "/";

        my $component;
        foreach $component (@components)
        {
            process_translation_index( $url, $component );
        }
    }
}

print "]\n\n" if $progress;

push( @index_urls, sort keys %urls_to_download );
download_urls( "translation", sort keys %urls_to_download );

foreach ( keys %urls_to_download )
{
    s[^(\w+)://][];
    s[~][%7E]g if get_variable("_tilde");
    $skipclean{$_} = 1;
}

######################################################################################
## DEP-11 index download

%urls_to_download = ();

sub find_dep11_files_in_release
{
    # Look in the dists/$DIST/Release file for the DEP-11 files that belong
    # to the given component and architecture.

    my $dist_uri  = shift;
    my $component = shift;
    my $arch      = shift;
    my ( $release_uri, $release_path, $line ) = '';

    $release_uri  = $dist_uri . "Release";
    $release_path = get_variable("skel_path") . "/" . sanitise_uri($release_uri);

    unless ( open STREAM, "<$release_path" )
    {
        warn( "Failed to open Release file from " . $release_uri );
        return;
    }

    my $checksums = 0;
    while ( $line = <STREAM> )
    {
        chomp $line;
        if ($checksums)
        {
            if ( $line =~ /^ +(.*)$/ )
            {
                my @parts = split( / +/, $1 );
                if ( @parts == 3 )
                {
                    my ( $sha1, $size, $filename ) = @parts;
                    if ( $filename =~ m{^$component/dep11/(Components-${arch}\.yml|icons-(.*)+\.tar)\.(gz|bz2|xz)$} )
                    {
                        add_url_to_download( $dist_uri . $filename, $size );
                    }
                }
                else
                {
                    warn("Malformed checksum line \"$1\" in $release_uri");
                }
            }
            else
            {
                $checksums = 0;
            }
        }
        if ( not $checksums )
        {
            if ( $line eq "SHA256:" )
            {
                $checksums = 1;
            }
        }
    }
}

print "Processing DEP-11 indexes: ["
    if $progress;

foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};
    print "D" if $progress;
    if (@components)
    {
        $url = $uri . "/dists/" . $distribution . "/";

        my $component;
        foreach $component (@components)
        {
            find_dep11_files_in_release( $url, $component, $arch );
        }
    }
}

print "]\n\n" if $progress;

push( @index_urls, sort keys %urls_to_download );
download_urls( "dep11", sort keys %urls_to_download );

foreach ( keys %urls_to_download )
{
    s[^(\w+)://][];
    s[~][%7E]g if get_variable("_tilde");
    $skipclean{$_} = 1;
}

######################################################################################
## Main download preparations

%urls_to_download = ();

open FILES_ALL, ">" . get_variable("var_path") . "/ALL" or die("apt-mirror: can't write to intermediate file (ALL)");
open FILES_NEW, ">" . get_variable("var_path") . "/NEW" or die("apt-mirror: can't write to intermediate file (NEW)");
open FILES_MD5, ">" . get_variable("var_path") . "/MD5" or die("apt-mirror: can't write to intermediate file (MD5)");
open FILES_SHA1, ">" . get_variable("var_path") . "/SHA1" or die("apt-mirror: can't write to intermediate file (SHA1)");
open FILES_SHA256, ">" . get_variable("var_path") . "/SHA256" or die("apt-mirror: can't write to intermediate file (SHA256)");

my %stat_cache = ();

sub _stat
{
    my ($filename) = shift;
    return @{ $stat_cache{$filename} } if exists $stat_cache{$filename};
    my @res = stat($filename);
    $stat_cache{$filename} = \@res;
    return @res;
}

sub clear_stat_cache
{
    %stat_cache = ();
}

sub need_update
{
    my $filename       = shift;
    my $size_on_server = shift;

    my ( undef, undef, undef, undef, undef, undef, undef, $size ) = _stat($filename);

    return 1 unless ($size);
    return 0 if $size_on_server == $size;
    return 1;
}

sub remove_spaces($)
{
    my $hashref = shift;
    foreach ( keys %{$hashref} )
    {
        while ( substr( $hashref->{$_}, 0, 1 ) eq ' ' )
        {
            substr( $hashref->{$_}, 0, 1 ) = '';
        }
    }
}

sub process_index
{
    my $uri   = shift;
    my $index = shift;
    my ( $path, $package, $mirror, $files ) = '';

    $path = sanitise_uri($uri);
    local $/ = "\n\n";
    $mirror = get_variable("mirror_path") . "/" . $path;

    if (-e "$path/$index.gz" )
    {
        system("gunzip < $path/$index.gz > $path/$index");
    }
    elsif (-e "$path/$index.xz" )
    {
        system("xz -d < $path/$index.xz > $path/$index");
    }
    elsif (-e "$path/$index.bz2" )
    {
        system("bzip2 -d < $path/$index.bz2 > $path/$index");
    }

    unless ( open STREAM, "<$path/$index" )
    {
        warn("apt-mirror: can't open index $path/$index in process_index");
        return;
    }

    while ( $package = <STREAM> )
    {
        local $/ = "\n";
        chomp $package;
        my ( undef, %lines ) = split( /^([\w\-]+:)/m, $package );

        $lines{"Directory:"} = "" unless defined $lines{"Directory:"};
        chomp(%lines);
        remove_spaces( \%lines );

        if ( exists $lines{"Filename:"} )
        {    # Packages index
            $skipclean{ remove_double_slashes( $path . "/" . $lines{"Filename:"} ) } = 1;
            print FILES_ALL remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n";
            print "ALL: " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n"
                if $verbose >= 3;
            print FILES_MD5 $lines{"MD5sum:"} . "  " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n" if defined $lines{"MD5sum:"};
            print FILES_SHA1 $lines{"SHA1:"} . "  " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n" if defined $lines{"SHA1:"};
            print FILES_SHA256 $lines{"SHA256:"} . "  " . remove_double_slashes( $path . "/" . $lines{"Filename:"} ) . "\n" if defined $lines{"SHA256:"};
            if ( need_update( $mirror . "/" . $lines{"Filename:"}, $lines{"Size:"} ) )
            {
                print FILES_NEW remove_double_slashes( $uri . "/" . $lines{"Filename:"} ) . "\n";
                print "NEW: " . remove_double_slashes( $uri . "/" . $lines{"Filename:"} ) . "\n"
                    if $verbose >= 1;
                add_url_to_download( $uri . "/" . $lines{"Filename:"}, $lines{"Size:"} );
            }
        }
        else
        {    # Sources index
            foreach ( split( /\n/, $lines{"Files:"} ) )
            {
                next if $_ eq '';
                my @file = split;
                die("apt-mirror: invalid Sources format") if @file != 3;
                $skipclean{ remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) } = 1;
                print FILES_ALL remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n";
                print "ALL: " . remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n"
                    if $verbose >= 3;
                print FILES_MD5 $file[0] . "  " . remove_double_slashes( $path . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n";
                if ( need_update( $mirror . "/" . $lines{"Directory:"} . "/" . $file[2], $file[1] ) )
                {
                    print FILES_NEW remove_double_slashes( $uri . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n";
                    print "NEW: " . remove_double_slashes( $uri . "/" . $lines{"Directory:"} . "/" . $file[2] ) . "\n"
                        if $verbose >= 1;
                    add_url_to_download( $uri . "/" . $lines{"Directory:"} . "/" . $file[2], $file[1] );
                }
            }
        }
    }

    close STREAM;
}

print "Processing indexes: ["
    if $progress;

foreach (@config_sources)
{
    my ( $uri, $distribution, @components ) = @{$_};
    print "S" if $progress;
    if (@components)
    {
        my $component;
        foreach $component (@components)
        {
            process_index( $uri, "/dists/$distribution/$component/source/Sources" );
        }
    }
    else
    {
        process_index( $uri, "/$distribution/Sources" );
    }
}

foreach (@config_binaries)
{
    my ( $arch, $uri, $distribution, @components ) = @{$_};
    print "P" if $progress;
    if (@components)
    {
        my $component;
        foreach $component (@components)
        {
            process_index( $uri, "/dists/$distribution/$component/binary-$arch/Packages" );
        }
    }
    else
    {
        process_index( $uri, "/$distribution/Packages" );
    }
}

clear_stat_cache();

print "]\n\n"
    if $progress;

close FILES_ALL;
close FILES_NEW;
close FILES_MD5;
close FILES_SHA1;
close FILES_SHA256;

######################################################################################
## Main download

chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");

my $need_bytes = 0;
foreach ( values %urls_to_download )
{
    $need_bytes += $_;
}

my $size_output = format_bytes($need_bytes);

(my $size_mirror) = $size_output =~ /\A([^:\s]+)/;
my $directory = get_variable("mirror_path");
my $command = "df /var/spool/apt-mirror/mirror/ | awk -F'[^0-9]*' 'NR==2 {print \$5}'";
(my $directory_size) = format_bytes(`$command`*1000) =~ /\A([^:\s]+)/;

if ($directory_size <= $size_mirror) {
	warn("apt-mirror: need space $size_output\n");
}
else 
{
	print "$size_output will be downloaded into archive.\n";

	download_urls( "archive", sort keys %urls_to_download );

	######################################################################################
	## Copy skel to main archive

	sub copy_file
	{
		my ( $from, $to ) = @_;
		my $dir = dirname($to);
		return unless -f $from;
		make_path($dir) unless -d $dir;
		if ( get_variable("unlink") == 1 )
		{
			if ( compare( $from, $to ) != 0 ) { unlink($to); }
		}
		unless ( copy( $from, $to ) )
		{
			warn("apt-mirror: can't copy $from to $to");
			return;
		}
		my ( $atime, $mtime ) = ( stat($from) )[ 8, 9 ];
		utime( $atime, $mtime, $to ) or die("apt-mirror: can't utime $to");
	}

	foreach (@index_urls)
	{
		die("apt-mirror: invalid url in index_urls") unless s[^(\w+)://][];
		copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") );
		copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.gz$//);
		copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.bz2$//);
		copy_file( get_variable("skel_path") . "/" . sanitise_uri("$_"), get_variable("mirror_path") . "/" . sanitise_uri("$_") ) if (s/\.xz$//);
	}
}
######################################################################################
## Make cleaning script

my ( @rm_dirs, @rm_files ) = ();
my $unnecessary_bytes = 0;

sub process_symlink
{
    return 1;    # symlinks are always needed
}

sub process_file
{
    my $file = shift;
    $file =~ s[~][%7E]g if get_variable("_tilde");
    return 1 if $skipclean{$file};
    push @rm_files, sanitise_uri($file);
    my ( undef, undef, undef, undef, undef, undef, undef, $size, undef, undef, undef, undef, $blocks ) = stat($file);
    $unnecessary_bytes += $blocks * 512;
    return 0;
}

sub process_directory
{
    my $dir       = shift;
    my $is_needed = 0;
    return 1 if $skipclean{$dir};
    opendir( my $dir_h, $dir ) or die "apt-mirror: can't opendir $dir: $!";
    foreach ( grep { !/^\.$/ && !/^\.\.$/ } readdir($dir_h) )
    {
        my $item = $dir . "/" . $_;
        $is_needed |= process_directory($item) if -d $item && !-l $item;
        $is_needed |= process_file($item)      if -f $item;
        $is_needed |= process_symlink($item)   if -l $item;
    }
    closedir $dir_h;
    push @rm_dirs, $dir unless $is_needed;
    return $is_needed;
}

chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");

foreach ( keys %clean_directory )
{
    process_directory($_) if -d $_ && !-l $_;
}

open CLEAN, ">" . get_variable("cleanscript") or die("apt-mirror: can't open clean script file");

my ( $i, $total ) = ( 0, scalar @rm_files );

if ( get_variable("_autoclean") )
{

    my $size_output = format_bytes($unnecessary_bytes);
    print "$size_output in $total files and " . scalar(@rm_dirs) . " directories will be freed...";

    chdir get_variable("mirror_path") or die("apt-mirror: can't chdir to mirror");

    foreach (@rm_files) { unlink $_; }
    foreach (@rm_dirs)  { rmdir $_; }

}
else
{

    my $size_output = format_bytes($unnecessary_bytes);
    print "$size_output in $total files and " . scalar(@rm_dirs) . " directories can be freed.\n";
    print "Run " . get_variable("cleanscript") . " for this purpose.\n\n";

    print CLEAN "#!/bin/sh\n";
    print CLEAN "set -e\n\n";
    print CLEAN "cd " . quoted_path(get_variable("mirror_path")) . "\n\n";
    print CLEAN "echo 'Removing $total unnecessary files [$size_output]...'\n";
    foreach (@rm_files)
    {
        print CLEAN "rm -f '$_'\n";
        print "  $_\n" if $verbose >= 1;
        print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 500;
        print CLEAN "echo -n .\n" unless $i % 10;
        $i++;
    }
    print CLEAN "echo 'done.'\n";
    print CLEAN "echo\n\n";

    $i     = 0;
    $total = scalar @rm_dirs;
    print CLEAN "echo 'Removing $total unnecessary directories...'\n";
    foreach (@rm_dirs)
    {
        print CLEAN "if test -d '$_'; then rm -r '$_'; fi\n";
        print CLEAN "echo -n '[" . int( 100 * $i / $total ) . "\%]'\n" unless $i % 50;
        print CLEAN "echo -n .\n";
        $i++;
    }
    print CLEAN "echo 'done.'\n";
    print CLEAN "echo\n";

    close CLEAN;

}

# Make clean script executable
my $perm = ( stat get_variable("cleanscript") )[2] & 07777;
chmod( $perm | 0111, get_variable("cleanscript") );

if ( get_variable("run_postmirror") )
{
    print "Running the Post Mirror script ...\n";
    print "(" . get_variable("postmirror_script") . ")\n\n";
    if ( -x get_variable("postmirror_script") )
    {
        system( get_variable("postmirror_script"), '' );
    }
    else
    {
        system( '/bin/sh', get_variable("postmirror_script") );
    }
    print "\nPost Mirror script has completed. See above output for any possible errors.\n\n";
}

unlock_aptmirror();
